10. 線形代数の応用と発展
最小2乗法
code: lstsqr.py
from numpy import array, linspace, sqrt, random, linalg
from numpy.
import matplotlib.pyplot as plt
n, m = 30, 1000
random.seed(2020)
x = linspace(0.0, 1.0, m)
w = random.normal(0.0, sqrt(1.0/m), m)
y = w.cumsum()
A = tA.T
S = linalg.solve(tA.dot(A), tA.dot(y))
L = linalg.lstsq(A, y, rcond=None)0 fig, axs = plt.subplots(1, 2, figsize=(15, 5))
for ax, B in zip(axs, S, L): z = B.dot(tA)
ax.plot(x, y), ax.plot(x, z)
plt.show()
https://gyazo.com/c864650b9b6fe70b805b8211ffb9f86f
左はlinalg.solve、右はlinalg.islsqr
複素平面に値をとる関数
https://gyazo.com/15da09f26c967abe4df02f537c4eef11https://gyazo.com/c6f5a1402476b8a7e9b2dc1eeaeb8f87
最小2乗法(その2)
code: moji.py
from numpy import array, linspace, identity, exp, pi, linalg, ones
from numpy.polynomial.legendre import Legendre
import matplotlib.pyplot as plt
with open('moji.txt', 'r') as fd:
y = eval(fd.read())
m = len(y)
x = linspace(0.0, 1.0, m)
def phi1(n):
return array([(x0 >= x).astype('int')
for x0 in linspace(0, 1, n)]).T
def phi2(n):
return array([exp(2 * pi * k * 1j * x)
for k in range(-n // 2, n // 2 + 1)]).T
def phi3(n):
return array([Legendre.basis(j, domain=0, 1)(x) for j in range(n)]).T
fig, axs = plt.subplots(3, 5, figsize=(16, 8))
c = linalg.lstsq(f(n), y, rcond=None)0 z = f(n).dot(c)
ax.scatter(z.real, z.imag, s=5), ax.plot(z.real, z.imag)
ax.axis('scaled'), ax.set_xlim(-1, 1), ax.set_ylim(-1, 1)
ax.tick_params(labelbottom=False, labelleft=False,
color='white')
plt.show()
https://gyazo.com/ebe0bc71907da3e1d922858f4f2d79fe
上から、2値関数、フーリエ級数、多項式
左から、$ n=8, 16, 32, 64, 128
ベクトル値確率変数
code: probab1.py
from numpy import array
from numpy.random import randint
def P(w): return 1 / len(Omega)
def X(w): return array([w0 + w1, w0 - w1]) for n in range(5):
w = omega()
print(X(w), end=' ')
print()
print(E(X))
code: probab2.py
from numpy.random import choice
def X1(w1): return X((w1, choice(W2)))
for n in range(20):
w1 = choice(W1)
print(X1(w1), end=' ')
主成分分析
code: scatter.py
import numpy as np
import vpython as vp
import matplotlib.pyplot as plt
with open('data.csv', 'r') as fd:
lines = fd.readlines()
data = np.array([eval(line) for line in lines1:]) def scatter3d(data):
o = vp.vec(0, 0, 0)
def scatter2d(data):
A = data.T
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
for n, B in enumerate([A0, 1, A0, 2, A1, 2]):
s = B.dot(B.T)
print(f'{cor:.3}')
plt.show()
if __name__ == '__main__':
scatter3d(data)
scatter2d(data)
実行結果: 相関係数
code: python
0.966
0.954
0.972
https://gyazo.com/2e7d3e79cd45cbd0a4865a83dd82ef2e
https://gyazo.com/344a2252e1108651041a0726af4b243c
code: principal.py
from numpy.linalg import eigh
from scatter import data, scatter2d, scatter3d
n = len(data)
mean = sum(data) / n
C = data - mean
A = C.T
AAt = A.dot(C) / n
E, U = eigh(AAt)
print(E)
scatter3d(C.dot(U))
scatter2d(C.dot(U))
実行結果: 固有値と相関係数
code: python
9.91e-17
-7.87e-17
-3.56e-16
https://gyazo.com/d831a699dde9de9e964e8ef8e45279d1
https://gyazo.com/1437c3ace3b5ce61be97c028dd372fd4
KL展開
code: KL2.py
import numpy as np
import matplotlib.pyplot as plt
s = 123
tmax, N = 100, 1000
dt = tmax / N
np.random.seed(s)
W = np.random.normal(0, dt, (2, N))
Noise = np.random.normal(0, 0.25, (4, N))
B = W.cumsum(axis=1)
A = P.dot(B) + Noise
U, S, V = np.linalg.svd(A)
print(S)
fig, axs = plt.subplots(1, 3, figsize=(20, 5))
T = np.linspace(0, tmax, N)
for i in range(4):
for i in range(2):
for i in range(4):
plt.show()
https://gyazo.com/707a9da0024eec1c74f6a627612ffb11
手書き数字のKL展開
code: mean.py
import numpy as np
import matplotlib.pyplot as plt
N = 60000
with open('train-images.bin', 'rb') as fd:
X = np.fromfile(fd, 'uint8', -1)16: X = X.reshape((N, 28, 28))
with open('train-labels.bin', 'rb') as fd:
Y = np.fromfile(fd, 'uint8', -1)8: D = {y: [] for y in set(Y)}
for x, y in zip(X, Y):
print([len(Dy) for y in sorted(D)]) fig, ax = plt.subplots(1, 10, figsize=(10, 1))
for y in D:
A = 255 - sum([x.astype('float') for x in Dy]) / len(Dy) axy.tick_params(labelbottom=False, labelleft=False, color='white')
plt.show()
https://gyazo.com/88ecdaeba11c6d9de524564ffc73fe19
code: mean2.py
import numpy as np
import matplotlib.pyplot as plt
N = 60000
with open('train-images.bin', 'rb') as fd:
X = np.fromfile(fd, 'uint8', -1)16: X = X.reshape((N, 28, 28))
with open('train-labels.bin', 'rb') as fd:
Y = np.fromfile(fd, 'uint8', -1)8: D = {y: [] for y in set(Y)}
for x, y in zip(X, Y):
print([len(Dy) for y in sorted(D)]) plt.imshow(A, 'gray')
plt.tick_params(labelbottom=False, labelleft=False,
color='white')
plt.show()
https://gyazo.com/90f19312aacc4cad72e14cda73baff01
code: mean_KL2.py
import numpy as np
import matplotlib.pyplot as plt
cutoff = 14
N = 60000
with open('train-images.bin', 'rb') as fd:
X = np.fromfile(fd, 'uint8', -1)16: X = X.reshape((N, 28, 28))
with open('train-labels.bin', 'rb') as fd:
Y = np.fromfile(fd, 'uint8', -1)8: D = {y: [] for y in set(Y)}
for x, y in zip(X, Y):
U, Sigma, V = np.linalg.svd(A)
print(Sigma)
def proj(X, U, V, k):
P, Q = U1.dot(U1.T), V1.T.dot(V1)
return P.dot(X.dot(Q))
fig, axs = plt.subplots(10, 10)
for y in D:
for k in range(10):
B = proj(A, U, V, cutoff)
ax.imshow(255 - B, 'gray')
ax.tick_params(labelbottom=False, labelleft=False,
color='white')
plt.show()
https://gyazo.com/4bd29363d6e90aa5c9b2379218a329a4 https://gyazo.com/0c9a975d6721d9497e432747c905644bhttps://gyazo.com/3ddbbb7335a8ac926370fed164eef2c1 https://gyazo.com/da4d26201c1e697c0b9e0a18d3ddbbfehttps://gyazo.com/0c4d87f643a0b62d2d369d3d6c1eaf48 https://gyazo.com/c81132e66f0182a0cb4c49a5819ec31f
線形回帰による確率変数の実現値の推定
code: estimate1.py
from numpy import array, random, linalg
import matplotlib.pyplot as plt
n = 10
B = linalg.pinv(A)
z1, z2, z3, z4 = B.dot(y1, y2) plt.scatter(x1, x2, s=50, marker='o')
plt.scatter(y1, y2, s=50, marker='x')
plt.scatter(z1, z2, s=50, marker='s')
xy = xz = 0
for u1, u2, v1, v2, w1, w2 in zip(x1, x2, y1, y2, z1, z2):
xy += (u1 - v1)**2 + (u2 - v2)**2
xz += (u1 - w1)**2 + (u2 - w2)**2
print(f'||x-y||^2 = {xy / n}')
print(f'||x-z||^2 = {xz / n}')
plt.show()
https://gyazo.com/8dbcde0280b8a70d008151dca65faaee
code: estimate2.py
from numpy import zeros, arange, random, linalg
import matplotlib.pyplot as plt
N, rho, sigma, tau = 100, 1.0, 0.1, 0.1
random.seed(123)
x, y = zeros(N), zeros(N)
for i in range(N):
xi = rho*xi - 1 + sigma*random.normal(0, 1) yi = xi + tau*random.normal(0, 1) A = zeros((N, 2 * N))
for i in range(N):
for j in range(i + 1):
Ai, j = rho**(i - j) * sigma B = linalg.pinv(A)
v = B.dot(y)
z = zeros(N)
for i in range(N):
print(f'(y-x)^2 = {sum((y-x) ** 2)}')
print(f'(z-x)^2 = {sum((z-x) ** 2)}')
plt.figure(figsize=(20, 5))
t = arange(N)
plt.plot(t, x, color='red')
plt.plot(t, y, color='green')
plt.plot(t, z, color='blue')
plt.show()
実行結果: 2乗誤差
code: python
(y-x)^2 = 1.2353743601911984
(z-x)^2 = 0.43958177113082514
https://gyazo.com/0b6730be01a5ed90c3e01a72b7e2b42a
赤が真値、緑が観測値、青が推定値
カルマン・フィルタ
code: kalman2.py
from numpy import zeros, random
import matplotlib.pyplot as plt
random.seed(123)
N, r, s, t = 100, 1.0, 0.1, 0.1
T = range(N)
x, y, z = zeros(N), zeros(N), zeros(N)
a = s**2
for i in range(N):
xi = r * xi - 1 + s * random.normal(0, 1) yi = xi + t * random.normal(0, 1) c = a - a**2 / (t**2 + a)
a = r * c + s**2
print(f'(y-x)^2 = {sum((y-x)**2)}')
print(f'(z-x)^2 = {sum((z-x)**2)}')
plt.figure(figsize=(20, 5))
plt.plot(T, x, color='red')
plt.plot(T, y, color='green')
plt.plot(T, z, color='blue')
plt.show()
実行結果: 2乗誤差
code: python
(y-x)^2 = 1.2353743601911977
(z-x)^2 = 0.6170319148795381
https://gyazo.com/d10a27992d05c0f9f173f6576a16ec0f
赤が真値、緑が観測値、青が推定値